Mailto: julio.cordero@medma.uni-heidelberg.de
-Instead of an expression of genes, we propose an enrichment score. Which combines the motif score of the TFs and the log10-Pvalue of the motif in the chromatin state. Each Cluster identified represents a group of PEAKS which contain a specific signature of MOTIFS from TFs. - Allow the identification of TFs in the neighborhood Zone. To understand the mechanism of how TFs, cooperate among them. -This Script was used with the Motifs associated with Histone marks. However, it could also be modified to input Motifs associated with ATAC-seq or ChIP-seq. In the case of ATAC-seq, the motifs coming from footprinting analysis or Homer motifs search are highly recommended.
workdir = "./"
setwd(workdir)
PTHA1="../03OUTPUT/"
dir.create(PTHA1)
PORT=PTHA1
TRY="_"
TRY1="02Ia_"
PROJECT="03b_TFinZONE_at_04dpci"
PVALUE=0.05
#### FOR VOLCANO ###
upcol<- "#B2182B" # magenta from PiyG
nc<- "#000000" # black
downcol<- "grey" # green from PiyG
CC= c(downcol, "#F7F7F7",upcol)
BASIC_COL="black"
CO3<- c("lightgrey","#B2182B")
CO_ALU3=c("#d7191c","#d8b365","#542788","grey", "#91bfdb", "grey","grey")
NAME33<- c("Mo_Name","Consensus","P-value","Log_P-value","q-value_Benja","Nor_of_Tar","Per_of_Tar","No_of_Tar_Backg","Per_of_Tar_Backg")
NAME34<- c("HOmer_NAME",NAME33)
mat_m<- data.frame("mask_500_FIND")
## Here you write the Path to the
WORK1="../01DATA_ORI/08MO_onP/"
#WORK1="Y:/002ZF/000FIGURES_ANALY/00GITHUB_01HIS_Chroma_Factors/01HIS_Chrom_Factors_TEST/04MOtifs_Analy_as_SC_MOAC_Fig3_5/02Fig4_MO_MOAC_14dpci/14dATA_ORI/08MOTIFS_on_PEAKS/"
NAME_G2 <-data.frame(list.files(path=WORK1,pattern="*_FIND"))
colnames(NAME_G2)<- "ENH4"
#NAME_G2$GROUP <- gsub("CORR1_14d_", "", NAME_G2$ENH4)
NAME_G2$GROUP <- gsub("*_mask_500_FIND", "", NAME_G2$ENH4)
### this is important to check the SYMBOL
NAME_G22<- data.frame(c("00A_Ia","01Ia_A","02U_Ea","03U_Ia","04U_Pa"))
NAME_G2c<- as.character(t(NAME_G2[,2]))
#WORK11A11="../14dATA_ORI/07aMO_forMOAC/08TSS_Enh_w_14dpci_fHeatmap_ALL.xlsx"
WORK11A11="../01DATA_ORI/07aMO_forSeurat_ANALY/08TSS_Enh_w_04dpci_fHeatmap_ALL.xlsx"
MAT_1A= read_excel(WORK11A11)
#MAT_1A= read.delim(WORK11A11)
colnames(MAT_1A)[1]<- "SYMBOL"
dim(MAT_1A)
## [1] 338 11
##### Set resolution for CLustering
RES=0.5
## sequential:
## - args: function (..., envir = parent.frame())
## - tweaked: FALSE
## - call: NULL
## sequential:
## - args: function (..., workers = 16, envir = parent.frame())
## - tweaked: TRUE
## - call: plan("sequential", workers = 16)
##
## 00A_Ia_04dcpi 01Ia_A_04dcpi 02U_Ea_04dcpi 03U_Ia_04dcpi 04U_Pa_04dcpi
## 39886 287817 14945 35034 37650
## [1] 3
b7<- AC_RE_VOL_SC_META
rownames(b7)<- b7$PositionID2
mat_c1<- mat_cm_TMM_BOTH_SC[,rownames(b7)]
rownames(mat_c1)<- mat_cm_TMM_BOTH_SC$SYMBOL
dim(mat_c1)
## [1] 240 4647
dim(b7)
## [1] 4647 5
smartseq2 <- CreateSeuratObject(mat_c1,project = "04d", meta.data = b7)
## png
## 2
arrangeQC_TF <- ggarrange(EC00,EC001, ncol = 2,nrow =1,common.legend = T, align = c("hv"),legend="none")
print(arrangeQC_TF)
grid.arrange( plot1,plot2, ncol = 1,nrow = 2)
## Centering and scaling data matrix
## PC_ 1
## Positive: Tbx5, MYB, Hnf6b, EWS, bHLHE41, MafA, Cux2, Hoxa10, LRF, Elk1
## HNF6, Npas4, Elk4, CHR, Tbx20, Zfp57, PSE, GATA, HINFP, ZNF7
## DMRT1, GLIS3, IRF3, DMRT6, RORg, MafB, TCFL2, GRHL2, MafF, THRb
## Negative: Twist2, SCL, Nanog, Sox3, Hoxa9, NeuroG2, TCF4, BHLHA15, RARa, Sox10
## Olig2, Smad3, Bapx1, Hoxd12, Sox21, Atoh1, Sox6, Smad2, Tcf21, ERG
## PR, MyoG, Smad4, Hoxd11, Hoxa11, Erra, Sox4, Fox, CEBP, Tbet
## PC_ 2
## Positive: NPAS, Tbx5, Gata4, HIC1, Gata6, Hoxd10, MNT, Gata2, Hnf6b, Gata1
## MafA, Max, STAT4, NPAS2, Sox15, Hoxa10, Sox9, Zac1, Hoxa9, Cux2
## Gfi1b, Smad4, BMAL1, LRF, Sox2, Erra, MITF, Hoxc9, Rfx6, BATF
## Negative: Foxf1, FOXK1, Foxo3, FoxL2, Foxo1, FOXP1, FOXK2, FOXA1, Foxa2, MYB
## Npas4, Foxa3, FOXM1, BMYB, Elk4, Eomes, ELF1, Elk1, ETS, FoxD3
## AMYB, bHLHE41, HNF6, CHR, Foxh1, ETV4, PSE, Oct4, Tbr1, Fli1
## PC_ 3
## Positive: GABPA, ETV1, ETV4, Elf4, Fli1, Etv2, ETS1, ERG, ELF3, EHF
## ELF5, ELF1, EWS, SPDEF, Elk1, ETS, Elk4, SpiB, Tbx5, Bach2
## LRF, Hnf6b, MafA, Cux2, Fosl2, Hoxa10, PRDM1, Fos, IRF8, STAT4
## Negative: Hoxd11, Hoxa11, FoxL2, Foxo3, Foxf1, FOXK1, Tcf21, Fox, Foxa2, FOXA1
## Cdx2, MyoG, TCF4, FOXK2, BHLHA15, Twist2, Ap4, HOXB13, Foxa3, NeuroG2
## Atoh1, FOXP1, CDX4, Hoxa13, Myf5, Hoxd13, SCL, ZBTB18, MyoD, Olig2
## PC_ 4
## Positive: Atf3, BATF, Fra2, Fra1, Fos, JunB, Fosl2, Bach2, NFE2L2, Bach1
## Nrf2, Foxa2, FOXA1, Foxa3, FoxL2, Foxo3, FOXK2, FOXK1, Foxf1, FOXP1
## Pdx1, FoxD3, Foxo1, Mef2c, FOXM1, Mef2b, Mef2d, VDR, ARE, Nur77
## Negative: BHLHA15, Tcf21, Twist2, Atoh1, MyoG, TCF4, NeuroG2, Myf5, Ap4, MyoD
## ZBTB18, Olig2, Atf7, Atf2, CREB5, SCL, JunD, ELF3, ELF5, Atf1
## Etv2, CLOCK, Elf4, CRE, Tbx21, EHF, SPDEF, ETS1, bHLHE40, NPAS2
## PC_ 5
## Positive: CLOCK, BMAL1, NPAS2, Max, NPAS, MNT, USF1, bHLHE40, Usf2, MITF
## bHLHE41, THRb, RUNX, Meis1, Tgif1, MafA, EBF1, GRHL2, RUNX2, Tcf7
## FOXK2, ERRg, MafF, ZNF652, Olig2, RUNX1, SCL, Atf3, Foxo3, Fra1
## Negative: Hoxd13, HOXB13, Hoxa13, Hoxd10, Hoxa9, Hoxd11, Unknown, Hoxd12, Cdx2, Hoxa11
## CDX4, Nanog, Sox4, Sox3, Sox15, Sox10, Sox7, STAT4, Sox2, STAT6
## Bapx1, Sox6, Hoxa10, Bcl6, Stat3, TEAD4, STAT5, STAT1, Hnf6b, Mef2b
## Plot Pincipal component analysis (PCA)
grid.arrange( P1, ncol = 1,nrow = 1)
print(P2)
## png
## 2
set.seed(1)
smartseq2 <- FindNeighbors(smartseq2, dims = 1:10)
smartseq2 <- FindClusters(smartseq2, resolution = RES, random.seed= 1, algorithm=1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4647
## Number of edges: 146263
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8798
## Number of communities: 12
## Elapsed time: 0 seconds
CO<-brewer.pal(n = 12, name = "Paired")
COb<-brewer.pal(n = 8, name = "Dark2")
CO2b<-c("#E66100","#000000","#0affc2")
CO2<-c("#b2182b","#ef8a62","#67a9cf")
##EDIT Paired color
COe2<- c("#88CCEE", "#332288","#AA4499", "#44AA99", "#999933", "#882255", "#661100", "#6699CC","#888888","#000000","#77ddcc","#df65b0","#CC79A7","#332288", "#E66100", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#A6CEE3", "#FDBF6F", "#FF7F00","#CAB2D6","#F0E442","#B15928","#7570B3","#E7298A","#666666","#c241f8" ,"mediumaquamarine","#882E72",'#1965B0',"#bf812d")
A1<- c('#b2182b','#d6614d','#f4a582')
A2<- c('#8c510a','#bf812d','#F6C141')
A3<- c("#882E72", "#AE76A3","#D1BBD7")
A4<- c('#01665e','#90C987','#CAE0AB')
A5<- c ('#1965B0','#4393c3',"#92c5de")
CO_ALU2=c("#d7191c","#d8b365","#542788","#abd9e9", "#2c7bb6", "#4d4d4d","grey")
CO_ALU=c("#d7191c","#542788","#d8b365","#abd9e9", "#2c7bb6", "#4d4d4d","grey")
CO_ALU3=c("#d7191c","#d8b365","#542788","grey", "#91bfdb", "grey","grey")
COe<- c(A1,A2,A3,A4,A5)
smartseq2 <- RunUMAP(smartseq2, dims = 1:30, n.neighbors = 30)
smartseq2 <- RunTSNE(smartseq2, dims = 1:20, check_duplicates = FALSE)
##### These PLots are printed later in the Script. here are to check #####
#DimPlot(smartseq2, reduction = "umap", label = T,cols = COe2,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne", label = T,cols = COe2,pt.size=0.7)
#DimPlot(smartseq2, reduction = "umap",group.by ='GROUP',cols = CO_ALU,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP',cols = COe,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne", label = T)
#DimPlot(smartseq2, reduction = "tsne",group.by ='seurat_clusters', split.by = 'GROUP',cols = COe2,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP', split.by = 'seurat_clusters',cols = CO_ALU3,pt.size=0.7)
#DimPlot(smartseq2, reduction = "umap",group.by ='GROUP',cols = CO_ALU3,pt.size=0.7)
#DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP',cols = CO_ALU3,pt.size=0.7)
{pdf(file=paste(PTHA1,"01Umap_tsne_",PROJECT,"CLU_PC10_Dim30",".pdf",sep=""), width=8, height=3)
P00a<- DimPlot(smartseq2, reduction = "umap", label = T,cols = COe2,pt.size=0.7)
P01a<- DimPlot(smartseq2, reduction = "umap", label = F,cols = COe2,pt.size=0.7)
P00at<- DimPlot(smartseq2, reduction = "tsne", label = T,cols = COe2,pt.size=0.7)
P01at<- DimPlot(smartseq2, reduction = "tsne", label = F,cols = COe2,pt.size=0.7)
P03<-DimPlot(smartseq2, reduction = "umap",group.by ='seurat_clusters', split.by = 'GROUP',cols = COe2,pt.size=0.7)
P03a<- DimPlot(smartseq2, reduction = "umap",group.by ='GROUP', split.by = 'seurat_clusters',cols = CO_ALU3,pt.size=0.7)
P03t<-DimPlot(smartseq2, reduction = "tsne",group.by ='seurat_clusters', split.by = 'GROUP',cols = COe2,pt.size=0.7)
P03at<- DimPlot(smartseq2, reduction = "tsne",group.by ='GROUP', split.by = 'seurat_clusters',cols = CO_ALU3,pt.size=0.7)
arrange1 <- ggarrange(P00a,P01a, ncol = 2,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1t <- ggarrange(P00at,P01at, ncol = 2,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1c <- ggarrange(P03, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
print(arrange1t)
print(arrange1)
}
dev.off()
## png
## 2
{pdf(file=paste(PTHA1,"01Umap_tsne_",PROJECT,"CLU_PC10_Dim30_by_STAGE",".pdf",sep=""), width=12, height=3)
arrange1bt <- ggarrange(P03t, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1ca <- ggarrange(P03a, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
arrange1cat <- ggarrange(P03at, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="right")
print(arrange1bt)
print(arrange1c)
print(arrange1cat)
}
dev.off()
## png
## 2
print(arrange1t)
print(arrange1)
print(arrange1bt)
print(arrange1c)
print(arrange1cat)
## png
## 2
print(arrange3)
print(arrange3t)
print(arrange2)
grid.arrange( P20,P201, ncol = 1,nrow = 2)
grid.arrange( P05ah_bino1, ncol = 1,nrow = 1)
grid.arrange( P05ah_bino, ncol = 1,nrow = 1)
##Save Seurat Object - Important for further modification of the Plots. You do not need to start from the start of the script.
DATA_SC<- data.frame(smartseq2@assays[["RNA"]]@counts@i)
DATA_SC<- data.frame(smartseq2@assays[["RNA"]]@counts@i)
DATA_SC<- GetAssay(smartseq2,assay = "RNA")
DATA_SC1<- data.frame(DATA_SC@scale.data)
DATA_SC2<- data.frame(DATA_SC@scale.data)
#dim(DATA_SC1)
META_SEU<- data.frame(smartseq2@meta.data)
colnames(DATA_SC1)<- META_SEU$PositionID
colnames(META_SEU)[4]<- "SYMBOL_TF"
GENES001<- c("Twist2","Smad4", "Nanog","Smad2","Fos","Sox9","Sox10","JunB")
GENES002 <- c("Pitx1","Isl1", "Mef2b","FOXA1","TEAD3","GATA3", "Gata6", "Mef2b")
GENES003 <- c("Eomes","Oct4","Foxh1","CUX1","Bapx1", "Elk4","Foxo1","Fli1")
GENES004<- c("Rbp7","Rgcc", "Fabp4","Egfl7","Flt1", "Cd36","Nrp1","Tpm1", "Rbp1")
LE=10
SI=10
YL3="Norm binding of TFs"
XL3="clusters"
P1 <- VlnPlot(smartseq2, GENES001, stack = TRUE, sort = F, flip = T,pt.size=0.02)+geom_point(size = .4,colour = "darkgrey") +scale_fill_manual(values = COe2)+
theme(legend.position = "none",plot.title= element_text(color=BASIC_COL, size=LE, face="bold"), axis.text.y= element_text(size=LE,face="bold"),axis.text.x = element_text(size=SI, face="bold"), axis.title.x =element_text(color=BASIC_COL, size=LE, face="bold"), axis.title.y = element_text(color=BASIC_COL, size=LE, face="bold")) +labs(title=paste(PROJECT, "SEVERAL_TFs",sep="" ) , x=XL3, y = YL3)
P1CM <- VlnPlot(smartseq2, GENES002, stack = TRUE, sort = F, flip = T,pt.size=0.02)+geom_point(size = .4,colour = "darkgrey") +scale_fill_manual(values = COe2)+
theme(legend.position = "none",plot.title= element_text(color=BASIC_COL, size=LE, face="bold"), axis.text.y= element_text(size=LE,face="bold"),axis.text.x = element_text(size=SI, face="bold"), axis.title.x =element_text(color=BASIC_COL, size=LE, face="bold"), axis.title.y = element_text(color=BASIC_COL, size=LE, face="bold")) +labs(title=paste(PROJECT, "SEVERAL_TFs",sep="" ) , x=XL3, y = YL3)
P1CM3 <- VlnPlot(smartseq2, GENES003, stack = TRUE, sort = F, flip = T,pt.size=0.02)+geom_point(size = .4,colour = "darkgrey") +scale_fill_manual(values = COe2)+
theme(legend.position = "none",plot.title= element_text(color=BASIC_COL, size=LE, face="bold"), axis.text.y= element_text(size=LE,face="bold"),axis.text.x = element_text(size=SI, face="bold"), axis.title.x =element_text(color=BASIC_COL, size=LE, face="bold"), axis.title.y = element_text(color=BASIC_COL, size=LE, face="bold")) +labs(title=paste(PROJECT, "SEVERAL_TFs",sep="" ) , x=XL3, y = YL3)
#P1CM3
#print(P1CM)
pdf(file=paste(PTHA1,"09_", PROJECT,"_CLU_vs_Ex_VIol_groups_STACKED",".pdf",sep=""), width=10, height=15)
print(P1)
arrange1CM <- ggarrange(P1CM, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="top")
arrange1CM3 <- ggarrange(P1CM3, ncol = 1,nrow =1,common.legend = T, align = c("hv"),legend="top")
dev.off()
## png
## 2
print(arrange1CM)
print(arrange1CM3)
## png
## 2
print(arrange1CM41)
print(arrange1CM42)
print(arrange1CM43)
## png
## 2
print(arrange12)
print(P04b12)
print(P04c12)
#FeaturePlot(smartseq2, features =GENES001,min.cutoff=0,max.cutoff=1,cols = CO3)
P05a<-FeaturePlot(smartseq2, features =GENES001,min.cutoff=0,max.cutoff=1,cols = CO3)
P05b<-FeaturePlot(smartseq2, features =GENES002,min.cutoff=0,max.cutoff=1,cols = CO3)
P05c<-FeaturePlot(smartseq2, features =GENES003,min.cutoff=0,max.cutoff=1,cols = CO3)
{pdf(file=paste(PTHA1,"09_",PROJECT,"_CLU_vs_Exp_UMMAP_groups",".pdf",sep=""),width=8, height=6)
print(P05a)
print(P05b)
print(P05c)
}
dev.off()
## png
## 2
P05a1<-FeaturePlot(smartseq2, features =GENES001,min.cutoff=0,max.cutoff=1,cols = CO3,reduction="tsne")
P05b1<-FeaturePlot(smartseq2, features =GENES002,min.cutoff=0,max.cutoff=1,cols = CO3,reduction="tsne")
P05c1<-FeaturePlot(smartseq2, features =GENES003,min.cutoff=0,max.cutoff=1,cols = CO3,reduction="tsne")
{pdf(file=paste(PTHA1,"09_",PROJECT,"_CLU_vs_Ex_TSNE_groups",".pdf",sep=""),width=8, height=6)
print(P05a1)
print(P05b1)
print(P05c1)
}
dev.off()
## png
## 2
print(P05a)
print(P05b)
print(P05c)
print(P05a1)
print(P05b1)
print(P05c1)